// .........................................................................
// Title: sovereign_data.do
//
// Imports and processes data on sovereign bond yields and returns, including
// data on US Treasuries
// .........................................................................

* --------------------------------------
* Treasury returns from CRSP
* --------------------------------------

* filtering the crsp sample
use "$raw/crsp/treasuries/tfz_dly_a", clear
rename (TDDURATN TDNOMPRC CALDT) (tdduratn tdnomprc date)
drop if tdduratn < 0
gen duration = round(tdduratn / 30)
drop if missing(duration)
keep if date >= td(01jan2000)

* keep only regular non-callable issues
keep if inlist(ITYPE, 1, 2, 4)

* only keep if prices are available
keep if TDNOMPRC_FLG == "M"
drop if tdnomprc == 0

* drop fields that are not necessary and do renaming
drop TDBID TDASK TDNOMPRC_FLG TDSOURCR TDPUBOUT TDTOTOUT KYTREASNO TCUSIP TDATDT TMATDT CRSPID TSTRIPELIG
drop IWHY TCOUPRT TNIPPY TVALFC TFCPDT IFCPDTF TFCALDT TNOTICE IYMCN ITYPE IUNIQ ITAX IFLWR TBANKDT TFRGNTGT
rename KYCRSPID crsp_id
rename TDACCINT tot_acc_int
rename TDPDINT paid_int
rename TDYLD yield_tm
rename TDRETNUA unadjusted_return

* save daily data
order crsp_id date duration tdduratn
gsort crsp_id date
save "$tmp/crsp_tfz_filtered_d", replace

* sample down to a weekly frequency
use "$tmp/crsp_tfz_filtered_d", clear
gen date_w = wofd(date)
format %tw date_w
gsort crsp_id date_w date

* convert daily fields to weekly
gen log_unadjusted_return = log(1 + unadjusted_return)
by crsp_id date_w: egen cl_return = sum(log_unadjusted_return)
gen weekly_unadjusted_return = exp(cl_return) - 1
by crsp_id date_w: egen weekly_paid_int = total(paid_int)
by crsp_id date_w: egen weekly_tot_acc_int = total(tot_acc_int)
by crsp_id date_w: keep if _n == _N
drop unadjusted_return cl_return log_unadjusted_return paid_int tot_acc_int date
rename weekly_* *

* save weekly data
order crsp_id date_w duration tdduratn
gsort crsp_id date_w
save "$tmp/crsp_tfz_filtered_w", replace

* average returns on individual treasuries
use "$tmp/crsp_tfz_filtered_w", clear
gcollapse (mean) unadjusted_return, by(date_w duration)
rename unadjusted_return return

* interpolate over durations if missing
xtset date_w duration
tsfill
gsort date_w duration
by date_w: ipolate return duration, gen(_return)
replace return = _return if missing(return)
drop _return

* data gets too sparse above 200 month duration
drop if duration > 200

* also interpolate over any missing dates
gsort duration date_w
xtset duration date_w
tsfill
by duration: ipolate return date_w, gen(_return)
replace return = _return if missing(return)
drop _return
rename return treasury_return

* construct return index (weekly)
order duration date_w
gsort duration date_w
gen log_return = log(1 + treasury_return)
by duration: gen cl_return = 1 + sum(log_return)
gen return_index = exp(cl_return) - 1
drop log_return cl_return
rename return_index tri
save "$tmp/crsp_treasury_returns_w", replace

* create monthly version as well
use "$tmp/crsp_treasury_returns_w", clear
gen date_m = mofd(dofw(date_w))
format %tm date_m
gen log_return = log(1 + treasury_return)
gcollapse (sum) log_return, by(date_m duration)
gen treasury_return = exp(log_return) - 1
drop log_return

* construct return index (monthly)
order duration date_m
gsort duration date_m
gen log_return = log(1 + treasury_return)
by duration: gen cl_return = 1 + sum(log_return)
gen return_index = exp(cl_return) - 1
drop log_return cl_return
rename return_index tri
save "$tmp/crsp_treasury_returns_m", replace

* --------------------------------------
* Treasury yields from CRSP
* --------------------------------------

* treasury yields by duration
use "$tmp/crsp_tfz_filtered_w", clear
gcollapse (mean) yield_tm, by(date_w duration)

* interpolate over durations if missing
xtset date_w duration
tsfill
gsort date_w duration
by date_w: ipolate yield_tm duration, gen(_yield_tm)
replace yield_tm = _yield_tm if missing(yield_tm)
drop _yield_tm

* data gets too sparse above 200 month duration
drop if duration > 200

* also interpolate over any missing dates
gsort duration date_w
xtset duration date_w
tsfill
by duration: ipolate yield_tm date_w, gen(_yield_tm)
replace yield_tm = _yield_tm if missing(yield_tm)
drop _yield_tm
replace yield_tm = yield_tm * 360
save "$tmp/crsp_treasury_yields_w", replace

* weekly frequency
use "$tmp/crsp_treasury_yields_w", clear
gen date_q = qofd(dofw(date_w))
format %tq date_q
gsort duration date_q date_w
by duration date_q: keep if _n == _N
drop date_w
order duration date_q
rename yield_tm treasury_yield
winsor2 treasury_yield, by(duration)
save "$tmp/crsp_treasury_yields_q", replace

* monthly frequency
use "$tmp/crsp_treasury_yields_w", clear
gen date_m = mofd(dofw(date_w))
format %tm date_m
gsort duration date_m date_w
by duration date_m: keep if _n == _N
drop date_w
order duration date_m
rename yield_tm treasury_yield
winsor2 treasury_yield, by(duration)
save "$tmp/crsp_treasury_yields_m", replace

* --------------------------------------
* Constant-maturity benchmarks
* --------------------------------------

* import raw excel
forval term=1/30 {
    import excel using "$raw/factset/sovereign_benchmarks/USA_`term'Y.xlsx", clear firstrow cellrange(A16)
    keep Date TotalReturnGrossUnhedged CumulativeReturnGrossUnhe Price
    gen term = "`term'Y"
    save "$tmp/cmb_USA_`term'Y_w", replace
}

* append
clear
forval term=1/30 {
    append using "$tmp/cmb_USA_`term'Y_w"
}
gen date_w = wofd(Date)
format %tw date_w
replace term = subinstr(term, "Y", "", .)
destring term, replace
gen term_m = term * 12
order term_m date_w
drop term
rename Date date
rename TotalReturnGrossUnhedged total_return_index
rename CumulativeReturnGrossUnhe cumulative_return_index
drop if missing(cumulative_return_index) & total_return_index == 0
rename Price price
drop if date_w < tw(1999w52)
gsort term_m date_w date
by term_m date_w: keep if _n == _N
save "$tmp/factset_cm_treasury", replace

* interpolate (weekly)
use "$tmp/factset_cm_treasury", clear
xtset term_m date_w
gsort term_m date_w
gen return = total_return_index / L.total_return_index - 1
drop cumulative_return_index price date
rename total_return_index tri
reshape wide return tri, i(date_w) j(term_m)
forval i=0/360 {
    cap gen return`i' = .
    cap gen tri`i' = .
}
forval i=0/11 {
    replace return`i' = return12
    replace tri`i' = tri12
}
reshape long
gsort date_w term_m
by date_w: ipolate return term_m, gen(_return)
by date_w: ipolate tri term_m, gen(_tri)
replace return = _return if missing(return)
replace tri = _tri if missing(tri)
drop _*
rename term_m duration
rename return treasury_return
save "$tmp/treasury_returns_cm_w", replace

* interpolate (monthly)
use "$tmp/factset_cm_treasury", clear
gen date_m = mofd(date)
gsort term_m date_m date
by term_m date_m: keep if _n == _N
xtset term_m date_m
gsort term_m date_m
gen return = total_return_index / L.total_return_index - 1
drop cumulative_return_index price date date_w
rename total_return_index tri
reshape wide return tri, i(date_m) j(term_m)
forval i=0/360 {
    cap gen return`i' = .
    cap gen tri`i' = .
}
forval i=0/11 {
    replace return`i' = return12
    replace tri`i' = tri12
}
reshape long
gsort date_m term_m
by date_m: ipolate return term_m, gen(_return)
by date_m: ipolate tri term_m, gen(_tri)
replace return = _return if missing(return)
replace tri = _tri if missing(tri)
drop _*
rename term_m duration
rename return treasury_return
save "$tmp/treasury_returns_cm_m", replace

* create merged version of treasury returns
use "$tmp/treasury_returns_cm_w", clear
keep if date_w >= tw(2000w1)
drop if duration <= 200
append using "$tmp/crsp_treasury_returns_w"
gsort duration date_w
save "$tmp/treasury_returns_merged_w", replace

use "$tmp/treasury_returns_cm_m", clear
keep if date_m >= tw(2000w1)
drop if duration <= 200
append using "$tmp/crsp_treasury_returns_m"
gsort duration date_m
save "$tmp/treasury_returns_merged_m", replace

* --------------------------------------
* Non-US yields: weekly
* --------------------------------------

* import factset constant-maturity benchmarks
foreach country in "DEU" "CAN" "GBR" {
foreach term in "1Y" "2Y" "3Y" "5Y" "7Y" "10Y" "20Y" "30Y" {
    import excel using "$raw/factset/sovereign_benchmarks/`country'_`term'.xlsx", clear firstrow cellrange(A16)
    keep Date TotalReturnGrossUnhedged CumulativeReturnGrossUnhe
    gen country = "`country'"
    gen term = "`term'"
    save "$tmp/cmb_`country'_`term'_w", replace
}
}

clear
foreach country in "DEU" "CAN" "GBR" {
foreach term in "1Y" "2Y" "3Y" "5Y" "7Y" "10Y" "20Y" "30Y" {
    append using "$tmp/cmb_`country'_`term'_w"
}
}
gen date_w = wofd(Date)
format %tw date_w
replace term = subinstr(term, "Y", "", .)
destring term, replace
gen term_m = term * 12
order country term_m date_w
drop Date term
rename TotalReturnGrossUnhedged total_return_index
rename CumulativeReturnGrossUnhe cumulative_return_index
drop if missing(cumulative_return_index) & total_return_index == 0

* construct returns
drop if date_w < tw(2006w52)
egen index_id = group(country term_m)
gsort index_id date_w
by index_id date_w: keep if _n == _N
xtset index_id date_w
gsort index_id date_w
gen return = total_return_index / L.total_return_index - 1
drop if missing(return)
keep country term_m date_w return

* interpolate sovereign returns from constant-maturity benchmarks
reshape wide return, i(country date_w) j(term_m)
forval i=0/360 {
    cap gen return`i' = .
}
forval i=0/11 {
    replace return`i' = return12
}
reshape long
gsort country date_w term_m
by country date_w: ipolate return term_m, gen(_return)
replace return = _return if missing(return)
drop _return
rename term_m duration
rename return sovereign_return
save "$tmp/sovereign_returns_nonus_cmb_w", replace
